'''
爬取贴吧内容
'''
import requests
class Webspider():
    def __init__(self,name):
        self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'}
        self.url_temp = "https://tieba.baidu.com/index.html?traceid=" + name + "&ie=utf-8&pn={}"
        self.name = name

    def get_url_list(self):
        return [self.url_temp.format(i*50) for i in range(50)]

    def parse_url(self,url):
        print(url)
        r = requests.get(url,headers = self.headers)
        return r.content.decode()

    def run(self):
        urls = self.get_url_list()
        for url in urls:
            html_str = self.parse_url(url)
            page_num = urls.index(url) + 1
            file_name = self.name + '第{}页.html'.format(page_num)
            with open(file_name,"w",encoding="utf-8") as f:
                f.write(html_str)
if __name__ == '__main__':
    r = Webspider(input("请输入贴吧名字："))
    r.run()






